}
skip_cr3:
- vmcb->cr4 = c->cr4 | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = c->cr4 | HVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = c->cr4;
vmcb->idtr.limit = c->idtr_limit;
/* update VMCB for nested paging restore */
if ( paging_mode_hap(v->domain) ) {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
vmcb->cr3 = c->cr3;
vmcb->np_enable = 1;
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
: : "a" (__pa(root_vmcb[cpu])) );
#ifdef __x86_64__
- /* Resume use of IST2 for NMIs now that the host TR is reinstated. */
- idt_tables[cpu][TRAP_nmi].a |= 2UL << 32;
+ /* Resume use of ISTs now that the host TR is reinstated. */
+ idt_tables[cpu][TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_tables[cpu][TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_tables[cpu][TRAP_machine_check].a |= 3UL << 32; /* IST3 */
#endif
}
set_segment_register(ss, 0);
/*
- * Cannot use IST2 for NMIs while we are running with the guest TR. But
- * this doesn't matter: the IST is only needed to handle SYSCALL/SYSRET.
+ * Cannot use ISTs for NMI/#MC/#DF while we are running with the guest TR.
+ * But this doesn't matter: the IST is only req'd to handle SYSCALL/SYSRET.
*/
- idt_tables[cpu][TRAP_nmi].a &= ~(2UL << 32);
+ idt_tables[cpu][TRAP_double_fault].a &= ~(3UL << 32);
+ idt_tables[cpu][TRAP_nmi].a &= ~(3UL << 32);
+ idt_tables[cpu][TRAP_machine_check].a &= ~(3UL << 32);
#endif
svm_restore_dr(v);
break;
case 4: /* CR4 */
+ if ( value & ~mmu_cr4_features )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to enable unsupported "
+ "CR4 features %lx (host %lx)",
+ value, mmu_cr4_features);
+ svm_inject_exception(v, TRAP_gp_fault, 1, 0);
+ break;
+ }
+
if ( paging_mode_hap(v->domain) )
{
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ v->arch.hvm_svm.cpu_shadow_cr4 = value;
+ vmcb->cr4 = value | (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
paging_update_paging_modes(v);
/* signal paging update to ASID handler */
svm_asid_g_update_paging (v);
}
v->arch.hvm_svm.cpu_shadow_cr4 = value;
- vmcb->cr4 = value | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = value | HVM_CR4_HOST_MASK;
/*
* Writing to CR4 to modify the PSE, PGE, or PAE flag invalidates
vmcb->cr2 = 0;
vmcb->efer = EFER_SVME;
- vmcb->cr4 = SVM_CR4_HOST_MASK;
+ vmcb->cr4 = HVM_CR4_HOST_MASK;
v->arch.hvm_svm.cpu_shadow_cr4 = 0;
if ( paging_mode_hap(v->domain) ) {
vmcb->cr0 = v->arch.hvm_svm.cpu_shadow_cr0;
- vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4;
+ vmcb->cr4 = v->arch.hvm_svm.cpu_shadow_cr4 |
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
}
/* This will jump to ROMBIOS */
break;
}
+ case VMEXIT_EXCEPTION_MC:
+ HVMTRACE_0D(MCE, v);
+ svm_store_cpu_guest_regs(v, regs, NULL);
+ do_machine_check(regs);
+ break;
+
case VMEXIT_VINTR:
vmcb->vintr.fields.irq = 0;
vmcb->general1_intercepts &= ~GENERAL1_INTERCEPT_VINTR;
/* Guest CR4. */
arch_svm->cpu_shadow_cr4 =
read_cr4() & ~(X86_CR4_PGE | X86_CR4_PSE | X86_CR4_PAE);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 | SVM_CR4_HOST_MASK;
+ vmcb->cr4 = arch_svm->cpu_shadow_cr4 | HVM_CR4_HOST_MASK;
paging_update_paging_modes(v);
vmcb->cr3 = v->arch.hvm_vcpu.hw_cr3;
vmcb->np_enable = 1; /* enable nested paging */
vmcb->g_pat = 0x0007040600070406ULL; /* guest PAT */
vmcb->h_cr3 = pagetable_get_paddr(v->domain->arch.phys_table);
- vmcb->cr4 = arch_svm->cpu_shadow_cr4 = 0;
+ vmcb->cr4 = arch_svm->cpu_shadow_cr4 =
+ (HVM_CR4_HOST_MASK & ~X86_CR4_PAE);
+ vmcb->exception_intercepts = HVM_TRAP_MASK;
/* No point in intercepting CR0/3/4 reads, because the hardware
* will return the guest versions anyway. */
}
else
{
- vmcb->exception_intercepts = 1U << TRAP_page_fault;
+ vmcb->exception_intercepts = HVM_TRAP_MASK | (1U << TRAP_page_fault);
}
return 0;
__vmwrite(VMCS_LINK_POINTER_HIGH, ~0UL);
#endif
- __vmwrite(EXCEPTION_BITMAP, 1U << TRAP_page_fault);
+ __vmwrite(EXCEPTION_BITMAP, HVM_TRAP_MASK | (1U << TRAP_page_fault));
/* Guest CR0. */
cr0 = read_cr0();
}
#endif
- __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
else
HVM_DBG_LOG(DBG_LEVEL_VMMU, "Update CR3 value = %x", c->cr3);
- __vmwrite(GUEST_CR4, (c->cr4 | VMX_CR4_HOST_MASK));
+ __vmwrite(GUEST_CR4, (c->cr4 | HVM_CR4_HOST_MASK));
v->arch.hvm_vmx.cpu_shadow_cr4 = c->cr4;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
case 4: /* CR4 */
old_cr = v->arch.hvm_vmx.cpu_shadow_cr4;
+ if ( value & ~mmu_cr4_features )
+ {
+ HVM_DBG_LOG(DBG_LEVEL_1, "Guest attempts to enable unsupported "
+ "CR4 features %lx (host %lx)",
+ value, mmu_cr4_features);
+ vmx_inject_hw_exception(v, TRAP_gp_fault, 0);
+ break;
+ }
if ( (value & X86_CR4_PAE) && !(old_cr & X86_CR4_PAE) )
{
if ( vmx_pgbit_test(v) )
}
}
- __vmwrite(GUEST_CR4, value| VMX_CR4_HOST_MASK);
+ __vmwrite(GUEST_CR4, value | HVM_CR4_HOST_MASK);
v->arch.hvm_vmx.cpu_shadow_cr4 = value;
__vmwrite(CR4_READ_SHADOW, v->arch.hvm_vmx.cpu_shadow_cr4);
}
}
-static void vmx_failed_vmentry(unsigned int exit_reason)
+static void vmx_failed_vmentry(unsigned int exit_reason,
+ struct cpu_user_regs *regs)
{
unsigned int failed_vmentry_reason = (uint16_t)exit_reason;
unsigned long exit_qualification;
break;
case EXIT_REASON_MACHINE_CHECK:
printk("caused by machine check.\n");
+ HVMTRACE_0D(MCE, current);
+ vmx_store_cpu_guest_regs(current, regs, NULL);
+ do_machine_check(regs);
break;
default:
printk("reason not known yet!");
local_irq_enable();
if ( unlikely(exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY) )
- return vmx_failed_vmentry(exit_reason);
+ return vmx_failed_vmentry(exit_reason, regs);
switch ( exit_reason )
{
vmx_inject_hw_exception(v, TRAP_page_fault, regs->error_code);
break;
case TRAP_nmi:
- HVMTRACE_0D(NMI, v);
if ( (intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI )
+ {
+ HVMTRACE_0D(NMI, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
do_nmi(regs); /* Real NMI, vector 2: normal processing. */
+ }
else
vmx_reflect_exception(v);
break;
+ case TRAP_machine_check:
+ HVMTRACE_0D(MCE, v);
+ vmx_store_cpu_guest_regs(v, regs, NULL);
+ do_machine_check(regs);
+ break;
default:
goto exit_and_crash;
}
asmlinkage int do_ ## _name(struct cpu_user_regs *regs)
asmlinkage void nmi(void);
+asmlinkage void machine_check(void);
DECLARE_TRAP_HANDLER(divide_error);
DECLARE_TRAP_HANDLER(debug);
DECLARE_TRAP_HANDLER(int3);
DECLARE_TRAP_HANDLER(simd_coprocessor_error);
DECLARE_TRAP_HANDLER(alignment_check);
DECLARE_TRAP_HANDLER(spurious_interrupt_bug);
-DECLARE_TRAP_HANDLER(machine_check);
long do_set_debugreg(int reg, unsigned long value);
unsigned long do_get_debugreg(int reg);
return do_guest_trap(TRAP_int3, regs, 0);
}
-asmlinkage int do_machine_check(struct cpu_user_regs *regs)
+asmlinkage void do_machine_check(struct cpu_user_regs *regs)
{
- fatal_trap(TRAP_machine_check, regs);
- return 0;
+ extern fastcall void (*machine_check_vector)(
+ struct cpu_user_regs *, long error_code);
+ machine_check_vector(regs, regs->error_code);
}
void propagate_page_fault(unsigned long addr, u16 error_code)
movw $TRAP_page_fault,2(%esp)
jmp handle_exception
-ENTRY(machine_check)
- pushl $TRAP_machine_check<<16
- jmp handle_exception
-
ENTRY(spurious_interrupt_bug)
pushl $TRAP_spurious_int<<16
jmp handle_exception
addl $4,%esp
jmp restore_all_xen
-ENTRY(nmi)
+handle_nmi_mce:
#ifdef CONFIG_X86_SUPERVISOR_MODE_KERNEL
- # NMI entry protocol is incompatible with guest kernel in ring 0.
+ # NMI/MCE entry protocol is incompatible with guest kernel in ring 0.
+ addl $4,%esp
iret
#else
# Save state but do not trash the segment registers!
- pushl $TRAP_nmi<<16
- SAVE_ALL(.Lnmi_xen,.Lnmi_common)
-.Lnmi_common:
- movl %esp,%eax
- pushl %eax
- call do_nmi
+ SAVE_ALL(.Lnmi_mce_xen,.Lnmi_mce_common)
+.Lnmi_mce_common:
+ xorl %eax,%eax
+ movw UREGS_entry_vector(%esp),%ax
+ movl %esp,%edx
+ pushl %edx
+ call *exception_table(,%eax,4)
addl $4,%esp
/*
* NB. We may return to Xen context with polluted %ds/%es. But in such
* be detected by SAVE_ALL(), or we have rolled back restore_guest.
*/
jmp ret_from_intr
-.Lnmi_xen:
+.Lnmi_mce_xen:
/* Check the outer (guest) context for %ds/%es state validity. */
GET_GUEST_REGS(%ebx)
testl $X86_EFLAGS_VM,%ss:UREGS_eflags(%ebx)
mov %ds,%eax
mov %es,%edx
- jnz .Lnmi_vm86
+ jnz .Lnmi_mce_vm86
/* We may have interrupted Xen while messing with %ds/%es... */
cmpw %ax,%cx
mov %ecx,%ds /* Ensure %ds is valid */
movl $.Lrestore_sregs_guest,%ecx
movl %edx,UREGS_es(%ebx) /* Ensure guest frame contains guest ES */
cmpl %ecx,UREGS_eip(%esp)
- jbe .Lnmi_common
+ jbe .Lnmi_mce_common
cmpl $.Lrestore_iret_guest,UREGS_eip(%esp)
- ja .Lnmi_common
+ ja .Lnmi_mce_common
/* Roll outer context restore_guest back to restoring %ds/%es. */
movl %ecx,UREGS_eip(%esp)
- jmp .Lnmi_common
-.Lnmi_vm86:
+ jmp .Lnmi_mce_common
+.Lnmi_mce_vm86:
/* vm86 is easy: the CPU saved %ds/%es so we can safely stomp them. */
mov %ecx,%ds
mov %ecx,%es
- jmp .Lnmi_common
+ jmp .Lnmi_mce_common
#endif /* !CONFIG_X86_SUPERVISOR_MODE_KERNEL */
+ENTRY(nmi)
+ pushl $TRAP_nmi<<16
+ jmp handle_nmi_mce
+
+ENTRY(machine_check)
+ pushl $TRAP_machine_check<<16
+ jmp handle_nmi_mce
+
ENTRY(setup_vm86_frame)
mov %ecx,%ds
mov %ecx,%es
ENTRY(exception_table)
.long do_divide_error
.long do_debug
- .long 0 # nmi
+ .long do_nmi
.long do_int3
.long do_overflow
.long do_bounds
movl $TRAP_page_fault,4(%rsp)
jmp handle_exception
-ENTRY(machine_check)
- pushq $0
- movl $TRAP_machine_check,4(%rsp)
- jmp handle_exception
-
ENTRY(spurious_interrupt_bug)
pushq $0
movl $TRAP_spurious_int,4(%rsp)
call do_early_page_fault
jmp restore_all_xen
-ENTRY(nmi)
- pushq $0
+handle_ist_exception:
SAVE_ALL
testb $3,UREGS_cs(%rsp)
- jz nmi_in_hypervisor_mode
+ jz 1f
/* Interrupted guest context. Copy the context to stack bottom. */
- GET_GUEST_REGS(%rbx)
+ GET_GUEST_REGS(%rdi)
+ movq %rsp,%rsi
movl $UREGS_kernel_sizeof/8,%ecx
-1: popq %rax
- movq %rax,(%rbx)
- addq $8,%rbx
- loop 1b
- subq $UREGS_kernel_sizeof,%rbx
- movq %rbx,%rsp
-nmi_in_hypervisor_mode:
- movq %rsp,%rdi
- call do_nmi
+ movq %rdi,%rsp
+ rep movsq
+1: movq %rsp,%rdi
+ movl UREGS_entry_vector(%rsp),%eax
+ leaq exception_table(%rip),%rdx
+ callq *(%rdx,%rax,8)
jmp ret_from_intr
+ENTRY(nmi)
+ pushq $0
+ movl $TRAP_nmi,4(%rsp)
+ jmp handle_ist_exception
+
+ENTRY(machine_check)
+ pushq $0
+ movl $TRAP_machine_check,4(%rsp)
+ jmp handle_ist_exception
+
.data
ENTRY(exception_table)
.quad do_divide_error
.quad do_debug
- .quad 0 # nmi
+ .quad do_nmi
.quad do_int3
.quad do_overflow
.quad do_bounds
{
/* Specify dedicated interrupt stacks for NMIs and double faults. */
set_intr_gate(TRAP_double_fault, &double_fault);
- idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
- idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_table[TRAP_double_fault].a |= 1UL << 32; /* IST1 */
+ idt_table[TRAP_nmi].a |= 2UL << 32; /* IST2 */
+ idt_table[TRAP_machine_check].a |= 3UL << 32; /* IST3 */
/*
* The 32-on-64 hypercall entry vector is only accessible from ring 1.
stack_bottom = (char *)get_stack_bottom();
stack = (char *)((unsigned long)stack_bottom & ~(STACK_SIZE - 1));
- /* Double-fault handler has its own per-CPU 2kB stack. */
+ /* Machine Check handler has its own per-CPU 1kB stack. */
+ init_tss[cpu].ist[2] = (unsigned long)&stack[1024];
+
+ /* Double-fault handler has its own per-CPU 1kB stack. */
init_tss[cpu].ist[0] = (unsigned long)&stack[2048];
/* NMI handler has its own per-CPU 1kB stack. */
return hvm_funcs.event_injection_faulted(v);
}
+/* These bits in the CR4 are owned by the host */
+#define HVM_CR4_HOST_MASK (mmu_cr4_features & \
+ (X86_CR4_VMXE | X86_CR4_PAE | X86_CR4_MCE))
+
+/* These exceptions must always be intercepted. */
+#define HVM_TRAP_MASK (1U << TRAP_machine_check)
+
#endif /* __ASM_X86_HVM_HVM_H__ */
void setup_vmcb_dump(void);
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define SVM_CR4_HOST_MASK (X86_CR4_PAE)
-#else
-#define SVM_CR4_HOST_MASK 0
-#endif
-
-
#endif /* ASM_X86_HVM_SVM_VMCS_H__ */
/*
#define DO_TRC_HVM_CPUID 1
#define DO_TRC_HVM_INTR 1
#define DO_TRC_HVM_NMI 1
+#define DO_TRC_HVM_MCE 1
#define DO_TRC_HVM_SMI 1
#define DO_TRC_HVM_VMMCALL 1
#define DO_TRC_HVM_HLT 1
#define X86_SEG_AR_GRANULARITY (1u << 15) /* 15, granularity */
#define X86_SEG_AR_SEG_UNUSABLE (1u << 16) /* 16, segment unusable */
-/* These bits in the CR4 are owned by the host */
-#if CONFIG_PAGING_LEVELS >= 3
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE | X86_CR4_PAE)
-#else
-#define VMX_CR4_HOST_MASK (X86_CR4_VMXE)
-#endif
-
#define VMCALL_OPCODE ".byte 0x0f,0x01,0xc1\n"
#define VMCLEAR_OPCODE ".byte 0x66,0x0f,0xc7\n" /* reg/opcode: /6 */
#define VMLAUNCH_OPCODE ".byte 0x0f,0x01,0xc2\n"
extern void mtrr_ap_init(void);
extern void mtrr_bp_init(void);
-extern void mcheck_init(struct cpuinfo_x86 *c);
+void mcheck_init(struct cpuinfo_x86 *c);
+asmlinkage void do_machine_check(struct cpu_user_regs *regs);
int cpuid_hypervisor_leaves(
uint32_t idx, uint32_t *eax, uint32_t *ebx, uint32_t *ecx, uint32_t *edx);
#define TRC_HVM_VMMCALL (TRC_HVM_HANDLER + 0x12)
#define TRC_HVM_HLT (TRC_HVM_HANDLER + 0x13)
#define TRC_HVM_INVLPG (TRC_HVM_HANDLER + 0x14)
+#define TRC_HVM_MCE (TRC_HVM_HANDLER + 0x15)
/* This structure represents a single trace buffer record. */
struct t_rec {